Primary exercises
- Create tibble
- Create a tibble
exercise_group for a group of individuals with names {Sonja, Steven, Ines, Robert, Tim} with their heights {164, 188, 164, 180, 170}, weights {56.0, 87.0, 54.0, 80.0, 58.5} and frequency of exercise {high, high, low, moderate, low}.
exercise_group <- tibble(name=c("Sonja" , "Steven", "Ines", "Robert", "Tim" ),
height=c(164, 188, 164, 180, 170),
weight=c(56.0, 87.0, 54.0, 80.0, 58.5),
exercise=c("high", "high", "low", "moderate", "low")
)
exercise_group
# A tibble: 5 × 4
name height weight exercise
<chr> <dbl> <dbl> <chr>
1 Sonja 164 56 high
2 Steven 188 87 high
3 Ines 164 54 low
4 Robert 180 80 moderate
5 Tim 170 58.5 low
- Update the tibble
exercise_group with Ella and Oscar, leave their respective height, weight and exercise values as missing (NA). Avoid copy/paste from (a) with inclusion of new names, instead try to reuse the columns inside exercise_group.
exercise_group <- tibble(name=c(exercise_group$name, "Ella", "Oscar"),
height=c(exercise_group$height,NA,NA),
weight=c(exercise_group$weight,NA,NA),
exercise=c(exercise_group$exercise,NA,NA)
)
exercise_group
# A tibble: 7 × 4
name height weight exercise
<chr> <dbl> <dbl> <chr>
1 Sonja 164 56 high
2 Steven 188 87 high
3 Ines 164 54 low
4 Robert 180 80 moderate
5 Tim 170 58.5 low
6 Ella NA NA <NA>
7 Oscar NA NA <NA>
- Add the
sex variable to exercise_group with values male and female.
exercise_group <- tibble(name=exercise_group$name,
height=exercise_group$height,
weight=exercise_group$weight,
exercise=exercise_group$exercise,
sex=c('female','male','female','male','male','female','male')
)
exercise_group
# A tibble: 7 × 5
name height weight exercise sex
<chr> <dbl> <dbl> <chr> <chr>
1 Sonja 164 56 high female
2 Steven 188 87 high male
3 Ines 164 54 low female
4 Robert 180 80 moderate male
5 Tim 170 58.5 low male
6 Ella NA NA <NA> female
7 Oscar NA NA <NA> male
- Create a tibble which keeps track of the smoking habits over the years of
Julio age 21 started smoking at 17 and stopped in 2020, Camille age 20 started smoking in 2021 and Travis 19 started at age 16.
# List the information first as below, here NA (missing value) is interpreted as not
# stopped, i.e. still smoking to present date.
#
# name age start stop
# Julio 21 2022-(21-17) 2020
# Camille 20 2021 NA
# Travis 19 2022-(19-16) NA
tibble(name=c("Julio", "Camille","Travis"),
age=c(21,20,19),
start=c(2018,2021,2019),
stop=c(2020,NA,NA))
# A tibble: 3 × 4
name age start stop
<chr> <dbl> <dbl> <dbl>
1 Julio 21 2018 2020
2 Camille 20 2021 NA
3 Travis 19 2019 NA
tibble subset
- Take the tibble
exercise_group from the previous exercise and create a new tibble exercise_group_sub without the height and weight variables by selection [.
exercise_group_sub <- exercise_group[c("name","exercise")]
exercise_group_sub
# A tibble: 7 × 2
name exercise
<chr> <chr>
1 Sonja high
2 Steven high
3 Ines low
4 Robert moderate
5 Tim low
6 Ella <NA>
7 Oscar <NA>
- Create a tibble called
exercise_group_sub with the 1st and 3rd column.
exercise_group_sub <- exercise_group[c(1,3)]
exercise_group_sub
# A tibble: 7 × 2
name weight
<chr> <dbl>
1 Sonja 56
2 Steven 87
3 Ines 54
4 Robert 80
5 Tim 58.5
6 Ella NA
7 Oscar NA
Read tibbles from file
- Read
pulse.csv data set into R and inspect its dimensions.
pulse <- read_csv(file = "pulse.csv")
# two alternatives i) nrow and ncol function, ii) dim function.
nrow(pulse) # number of rows
[1] 110
ncol(pulse) # number of columns
[1] 13
dim(pulse) # dimensions (rows, columns)
[1] 110 13
- Read
survey.csv data set into R.
survey <- read_csv(file = "survey.csv")
dim(survey)
[1] 233 13
- Show the first 9 and the last 7 rows.
head(survey,9)
# A tibble: 9 × 13
name gender span1 span2 hand fold pulse clap exerc…¹ smokes height m.i age
<chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <dbl> <chr> <dbl>
1 Alyson female 18.5 18 right right 92 left some never 173 metr… 18.2
2 Todd male 19.5 20.5 left right 104 left none regul 178. impe… 17.6
3 Gerald male 18 13.3 right left 87 neit… none occas NA <NA> 16.9
4 Robert male 18.8 18.9 right right NA neit… none never 160 metr… 20.3
5 Dustin male 20 20 right neit… 35 right some never 165 metr… 23.7
6 Abby female 18 17.7 right left 64 right some never 173. impe… 21
7 Andre male 17.7 17.7 right left 83 right freq never 183. impe… 18.8
8 Micha… female 17 17.3 right right 74 right freq never 157 metr… 35.8
9 Edward male 20 19.5 right right 72 right some never 175 metr… 19
# … with abbreviated variable name ¹exercise
tail(survey,7)
# A tibble: 7 × 13
name gender span1 span2 hand fold pulse clap exerc…¹ smokes height m.i age
<chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <dbl> <chr> <dbl>
1 Marce… female 18.8 18.5 right right 80 right some never 169 metr… 18.2
2 Jerry male 18 16 right right NA right some never 180. impe… 20.8
3 Jeanne female 18 18 right left 85 right some never 165. impe… 17.7
4 Rosan… female 18.5 18 right left 88 right some never 160 metr… 16.9
5 Tracey female 17.5 16.5 right right NA right some never 170 metr… 18.6
6 Keith male 21 21.5 right right 90 right some never 183 metr… 17.2
7 Celina female 17.6 17.3 right right 85 right freq never 168. metr… 17.8
# … with abbreviated variable name ¹exercise
mean(survey$age)
[1] 20.35591
- Calculate the mean height in survey data.
# Here we use a second argument 'na.rm = TRUE' because there are missing values (NA) in
# the variable height. By default the mean function returns NA if it first argument, in this
# case variable 'height', contains any NA. The second argument 'na.rm = TRUE' changes this
# behaviour by disregarding the observations with missing height and calculates the mean
# of observations for which the height is available.
#
mean(survey$height, na.rm = TRUE)
[1] 172.3459